import jieba
import re

reader = open('D:\python\豆瓣电影数据分析可视化系统\wordAna\commentOne.txt', 'r', encoding='utf-8')
strs = reader.read()
result = open('commentTwo.csv', 'w', encoding='utf-8')
word_list = jieba.cut(strs, cut_all=True)
new_word = []
for i in word_list:
    m = re.search(r'\d+', i)
    n = re.search(r'\W+', i)
    if not m and not n and len(i) > 1:
        new_word.append(i)
word_count = {}
for i in set(new_word):
    word_count[i] = new_word.count(i)
print(word_count)
list_count = sorted(word_count.items(), key=lambda co: co[1], reverse=True)
for i in range(300):
    print(list_count[i], file=result)